# clear environment
rm(list=ls())

# load packages
library(openxlsx)
library(ggplot2)

# load data set with organization keyword counts and crime numbers
dat = openxlsx::read.xlsx("vsberichte_metadata.xlsx", sheet = 1)

# binary varibable indicating whether interior minister is from a center-right party
dat$cr_intmin <- ifelse(dat$intmin_party == "cdu" | dat$intmin_party== "csu" | dat$intmin_party=="fdp", 1, ifelse(dat$intmin_party=="pro", NA, 0))

# remove those without center-right or center-left interior minister
dat <- dat[!is.na(dat$cr_intmin),]

# logged ratio of organization keywords in chapters on right-wing extremism and left-wing extremism
dat$ratio_log_orga <- log(dat$count_orga_kw_right_chapter + 0.5) - log(dat$count_orga_kw_left_chapter + 0.5)

# ratio in number of words
dat$rwe_lwe_words_ratio <- dat$count_words_right_chapter / dat$count_words_left_chapter

# ratio in crimes
dat$rwe_lwe_crime_ratio <- dat$extreme_crime_right / dat$extreme_crime_left

# create decade indicator
dat$decade <- NA
dat$decade[dat$year<1960]<-1950
dat$decade[dat$year>=1960 & dat$year<1970]<-1960
dat$decade[dat$year>=1970 & dat$year<1980]<-1970
dat$decade[dat$year>=1980 & dat$year<1990]<-1980
dat$decade[dat$year>=1990 & dat$year<2000]<-1990
dat$decade[dat$year>=2000 & dat$year<2010]<-2000
dat$decade[dat$year>=2010 & dat$year<2020]<-2010
dat$decade[dat$year>=2020 & dat$year<2030]<-2020
dat <- dat[dat$year>1950,]

# merge in polling data
polbar <- readstata13::read.dta13("polbar_agg.dta")
polbar$jurisdiction <- tolower(polbar$jurisdiction)
dat$jurisdiction <- gsub("ü", "u", dat$jurisdiction)
dat <- merge(dat, polbar, by.x=c("jurisdiction", "year_pub"), by.y=c("jurisdiction", "year"), all.x=T)

# imputation
dat_mice <- dat[!is.na(dat$ratio_log_orga) & !is.na(dat$cr_intmin),
                c("ratio_log_orga", "rwe_lwe_crime_ratio", "cr_intmin", "jurisdiction", "year",
                  "pmk_right", "pmk_left", "extreme_crime_violent_right", "extreme_crime_violent_left",
                  "person_right", "person_left", "vshead_party", "extreme_crime_islamist", "person_violent_islamist",
                  "extreme_crime_left", "extreme_crime_right", "rwe_lwe_words_ratio", "polbar_fr_vote_pct", "elec_fr_vote_pct")]
dat_imputed <- mice::mice(dat_mice, m=20, maxit = 50, method = 'pmm', seed = 500)

# regression models
mod_1<-lm(ratio_log_orga~factor(cr_intmin), data=dat)
mod_2<-lm(ratio_log_orga~factor(cr_intmin)+factor(jurisdiction), data=dat)
mod_3<-lm(ratio_log_orga~factor(cr_intmin)+factor(jurisdiction)+factor(decade), data=dat)
mod_4<-lm(ratio_log_orga~factor(cr_intmin)+factor(jurisdiction)+factor(year), data=dat)
mod_5<-lm(ratio_log_orga~factor(cr_intmin)+rwe_lwe_words_ratio+factor(jurisdiction)+factor(year), data=dat)
mod_6<-mice::pool(with(dat_imputed, exp=lm(ratio_log_orga~factor(cr_intmin)+rwe_lwe_words_ratio+rwe_lwe_crime_ratio+factor(jurisdiction)+factor(year))))
